msg_tool\scripts\artemis\ast/
parser.rs

1use super::types::*;
2use crate::types::*;
3use crate::utils::encoding::*;
4use crate::utils::escape::unescape_lua_str;
5use anyhow::Result;
6
7/// A parser for Artemis AST scripts.
8pub struct Parser<'a> {
9    str: &'a [u8],
10    pos: usize,
11    len: usize,
12    line: usize,
13    line_index: usize,
14    encoding: Encoding,
15}
16
17impl<'a> Parser<'a> {
18    /// Creates a new parser for the given string with the specified encoding.
19    ///
20    /// * `str` - The string to parse.
21    /// * `encoding` - The encoding of the string.
22    pub fn new<S: AsRef<[u8]> + ?Sized>(str: &'a S, encoding: Encoding) -> Self {
23        let str = str.as_ref();
24        Parser {
25            str,
26            pos: 0,
27            len: str.len(),
28            line: 1,
29            line_index: 1,
30            encoding,
31        }
32    }
33
34    /// Checks if input is a valid header for an AST file.
35    pub fn try_parse_header(mut self) -> Result<()> {
36        self.erase_whitespace();
37        if self.is_indent(b"astver") {
38            self.parse_indent(b"astver")?;
39            self.parse_equal()?;
40            self.parse_f64()?;
41        } else if self.is_indent(b"astname") {
42            self.parse_indent(b"astname")?;
43            self.parse_equal()?;
44        } else if self.is_indent(b"ast") {
45            self.parse_indent(b"ast")?;
46            self.parse_equal()?;
47        } else {
48            return self.error("expected 'astver', 'astname' or 'ast'");
49        }
50        Ok(())
51    }
52
53    /// Parses the AST file and returns an [AstFile] object.
54    pub fn parse(mut self) -> Result<AstFile> {
55        self.erase_whitespace();
56        let astver = if self.is_indent(b"astver") {
57            self.parse_indent(b"astver")?;
58            self.parse_equal()?;
59            Some(self.parse_f64()?)
60        } else {
61            None
62        };
63        self.erase_whitespace();
64        let mut astname = None;
65        if self.is_indent(b"astname") {
66            self.parse_indent(b"astname")?;
67            self.parse_equal()?;
68            astname = Some(self.parse_any_str()?.to_string());
69            self.erase_whitespace();
70        }
71        self.parse_indent(b"ast")?;
72        self.parse_equal()?;
73        let ast = self.parse_value()?;
74        Ok(AstFile {
75            astver,
76            astname,
77            ast,
78        })
79    }
80
81    fn parse_equal(&mut self) -> Result<()> {
82        self.erase_whitespace();
83        match self.next() {
84            Some(b'=') => Ok(()),
85            _ => self.error("expected '='"),
86        }
87    }
88
89    fn parse_value(&mut self) -> Result<Value> {
90        self.erase_whitespace();
91        match self.peek() {
92            Some(t) => match t {
93                b'"' => return self.parse_str().map(|x| Value::Str(x.to_string())),
94                b'[' => {
95                    self.eat_char();
96                    match self.peek().ok_or(self.error2("unexpected eof"))? {
97                        b'[' => {
98                            self.pos -= 1; // Rewind to the first '['
99                            self.parse_raw_str().map(|x| Value::Str(x))
100                        }
101                        _ => {
102                            self.pos -= 1;
103                            self.parse_key_val()
104                        }
105                    }
106                }
107                b'-' | b'.' | b'0'..=b'9' => return self.parse_any_number(),
108                b'n' => {
109                    if self.is_indent(b"nil") {
110                        self.pos += 3; // Skip "nil"
111                        Ok(Value::Null)
112                    } else {
113                        self.parse_key_val()
114                    }
115                }
116                b'_' | b'a'..=b'z' | b'A'..=b'Z' | b']' => return self.parse_key_val(),
117                b'{' => return self.parse_array(),
118                _ => return self.error(format!("unexpected token: {}", t)),
119            },
120            None => return self.error("unexpected eof"),
121        }
122    }
123
124    fn parse_array(&mut self) -> Result<Value> {
125        self.erase_whitespace();
126        self.parse_indent(b"{")?;
127        let mut array = Vec::new();
128        loop {
129            self.erase_whitespace();
130            match self.peek() {
131                Some(b'}') => {
132                    self.eat_char();
133                    break;
134                }
135                Some(_) => {
136                    let val = self.parse_value()?;
137                    array.push(val);
138                    match self.peek() {
139                        Some(b',') => {
140                            self.eat_char();
141                        }
142                        _ => {}
143                    }
144                }
145                None => return self.error("unexpected eof"),
146            }
147        }
148        Ok(Value::Array(array))
149    }
150
151    fn parse_any_number(&mut self) -> Result<Value> {
152        self.erase_whitespace();
153        let start = self.pos;
154        while let Some(c) = self.peek() {
155            if c == b'.' || c == b'-' || c.is_ascii_digit() {
156                self.eat_char();
157            } else {
158                break;
159            }
160        }
161        let s = std::str::from_utf8(&self.str[start..self.pos])?;
162        if s.contains('.') {
163            s.parse()
164                .map(Value::Float)
165                .map_err(|e| self.error2(format!("failed to parse f64: {}", e)))
166        } else {
167            s.parse()
168                .map(Value::Int)
169                .map_err(|e| self.error2(format!("failed to parse i64: {}", e)))
170        }
171    }
172
173    fn parse_any_str(&mut self) -> Result<String> {
174        self.erase_whitespace();
175        match self.peek().ok_or(self.error2("unexpected eof"))? {
176            b'"' => self.parse_str(),
177            b'[' => self.parse_raw_str(),
178            _ => self.error("expected string or raw string"),
179        }
180    }
181
182    fn parse_f64(&mut self) -> Result<f64> {
183        self.erase_whitespace();
184        let start = self.pos;
185        while let Some(c) = self.peek() {
186            if c == b'.' || c == b'-' || c.is_ascii_digit() {
187                self.eat_char();
188            } else {
189                break;
190            }
191        }
192        let s = std::str::from_utf8(&self.str[start..self.pos])?;
193        s.parse()
194            .map_err(|e| self.error2(format!("failed to parse f64: {}", e)))
195    }
196
197    fn parse_str(&mut self) -> Result<String> {
198        self.erase_whitespace();
199        self.parse_indent(b"\"")?;
200        let start = self.pos;
201        let mut pc = None;
202        let end = loop {
203            match self.next() {
204                Some(c) => {
205                    if c == b'"' {
206                        if pc.is_none_or(|x| x != b'\\') {
207                            break self.pos - 1;
208                        }
209                    }
210                    pc = Some(c);
211                }
212                None => return self.error("unexpected eof"),
213            }
214        };
215        Ok(unescape_lua_str(
216            &decode_to_string(self.encoding, &self.str[start..end], true)
217                .map_err(|e| self.error2(e))?,
218        ))
219    }
220
221    fn parse_raw_str(&mut self) -> Result<String> {
222        self.erase_whitespace();
223        self.parse_indent(b"[[")?;
224        let start = self.pos;
225        let mut pc = None;
226        let end = loop {
227            match self.next() {
228                Some(c) => {
229                    if c == b']' {
230                        if pc.is_some_and(|x| x == b']') {
231                            break self.pos - 2;
232                        }
233                    }
234                    pc = Some(c);
235                }
236                None => return self.error("unexpected eof"),
237            }
238        };
239        decode_to_string(self.encoding, &self.str[start..end], true).map_err(|e| self.error2(e))
240    }
241
242    fn erase_whitespace(&mut self) {
243        while let Some(c) = self.peek() {
244            if c == b' ' || c == b'\t' || c == b'\n' || c == b'\r' {
245                if c == b'\n' {
246                    self.line += 1;
247                    self.line_index = 1;
248                } else {
249                    self.line_index += 1;
250                }
251                self.eat_char();
252            } else {
253                break;
254            }
255        }
256    }
257
258    fn next(&mut self) -> Option<u8> {
259        if self.pos < self.len {
260            let c = self.str[self.pos];
261            self.pos += 1;
262            if c == b'\n' {
263                self.line += 1;
264                self.line_index = 1;
265            } else {
266                self.line_index += 1;
267            }
268            Some(c)
269        } else {
270            None
271        }
272    }
273
274    fn peek(&self) -> Option<u8> {
275        if self.pos < self.len {
276            Some(self.str[self.pos])
277        } else {
278            None
279        }
280    }
281
282    fn parse_key_val(&mut self) -> Result<Value> {
283        let key = self.get_indent()?;
284        self.parse_equal()?;
285        let val = self.parse_value()?;
286        Ok(Value::KeyVal((Box::new(key), Box::new(val))))
287    }
288
289    fn get_indent(&mut self) -> Result<Value> {
290        self.erase_whitespace();
291        let start = self.pos;
292        let mut is_first = true;
293        let end = loop {
294            match self.peek() {
295                Some(t) => match t {
296                    b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'"' => self.eat_char(),
297                    b'[' => {
298                        self.eat_char();
299                        let v = self.parse_value()?;
300                        let n = self.next().ok_or(self.error2("unexpected eof"))?;
301                        if n != b']' {
302                            return self.error("expected ']' after key");
303                        }
304                        return Ok(v);
305                    }
306                    b'0'..=b'9' => {
307                        if is_first {
308                            return self.error("unexpected digit");
309                        }
310                        self.eat_char();
311                    }
312                    b' ' | b'\t' | b'=' | b'\n' | b'\r' => break self.pos,
313                    _ => return self.error("unexpected token"),
314                },
315                None => return self.error("unexpected eof"),
316            }
317            is_first = false;
318        };
319        let mut data = &self.str[start..end];
320        if data.starts_with(b"[\"") && data.ends_with(b"\"]") {
321            data = &data[2..data.len() - 2];
322        }
323        Ok(Value::Str(
324            decode_to_string(self.encoding, data, true).map_err(|e| self.error2(e))?,
325        ))
326    }
327
328    fn is_indent(&self, indent: &[u8]) -> bool {
329        if self.pos + indent.len() > self.len {
330            return false;
331        }
332        for (i, c) in indent.iter().enumerate() {
333            if self.str[self.pos + i] != *c {
334                return false;
335            }
336        }
337        true
338    }
339
340    fn parse_indent(&mut self, indent: &[u8]) -> Result<()> {
341        for c in indent {
342            match self.next() {
343                Some(x) => {
344                    if x != *c {
345                        return self.error("unexpected indent");
346                    }
347                }
348                None => return self.error("unexpected eof"),
349            }
350        }
351        Ok(())
352    }
353
354    fn eat_char(&mut self) {
355        if self.pos < self.len {
356            self.pos += 1;
357        }
358    }
359
360    fn error2<T>(&self, msg: T) -> anyhow::Error
361    where
362        T: std::fmt::Display,
363    {
364        anyhow::Error::msg(format!(
365            "Failed to parse at position line {} column {} (byte {}): {}",
366            self.line, self.line_index, self.pos, msg
367        ))
368    }
369
370    fn error<T, A>(&self, msg: T) -> Result<A>
371    where
372        T: std::fmt::Display,
373    {
374        Err(anyhow::Error::msg(format!(
375            "Failed to parse at position line {} column {} (byte {}): {}",
376            self.line, self.line_index, self.pos, msg
377        )))
378    }
379}